# -*- coding:utf-8 -*-

import requests
from lxml import etree
from fake_useragent import UserAgent
from http import cookiejar
import json
from datetime import datetime
import time


requests.packages.urllib3.disable_warnings()  # 忽略HTTPS安全警告

"""
链家-二手房
https://bj.lianjia.com/ershoufang/
"""


class Test_Get():
    def __init__(self):
        #声明一个CookieJar对象实例来保存cookie
        self.cookie = cookiejar.CookieJar()
        ua = UserAgent(use_cache_server=False)  # 禁用服务器缓存
        self.headers = {
            # "User-Agent": ua.random
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
            "Accept-Encoding": "gzip, deflate, br",
            "Accept-Language": "zh-CN,zh;q=0.9",
            "Connection": "keep-alive",
            "Cookie": "TY_SESSION_ID=dd82ed1b-9252-4e1e-b91c-fe49b1d6cb61; ljref=pc_sem_baidu_ppzq_x; digv_extends=%7B%22utmTrackId%22%3A%2221583074%22%7D; lianjia_ssid=6666c17a-4aba-41d7-a787-8aa56a7db3d6; lianjia_uuid=d5a03dbc-bfc9-4cd7-b254-c9567b6ca2b3; _jzqc=1; _jzqa=1.2296432453478440400.1580481835.1580481835.1580481835.1; _jzqckmp=1; _jzqy=1.1580481835.1580481835.1.jzqsr=baidu|jzqct=lianjia.-; UM_distinctid=16ffc0ef10584f-043a247dd37ee8-b383f66-144000-16ffc0ef10783a; CNZZDATA1253477573=1414829099-1580478557-https%253A%252F%252Fsp0.baidu.com%252F%7C1580478557; _qzjc=1; _smt_uid=5e343d2b.346f7802; CNZZDATA1254525948=5771172-1580476672-https%253A%252F%252Fsp0.baidu.com%252F%7C1580476672; CNZZDATA1255633284=1677200805-1580476967-https%253A%252F%252Fsp0.baidu.com%252F%7C1580476967; CNZZDATA1255604082=1374746377-1580478781-https%253A%252F%252Fsp0.baidu.com%252F%7C1580478781; sajssdk_2015_cross_new_user=1; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%2216ffc0ef22a880-0959c2b193b881-b383f66-1327104-16ffc0ef22c9d2%22%2C%22%24device_id%22%3A%2216ffc0ef22a880-0959c2b193b881-b383f66-1327104-16ffc0ef22c9d2%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E4%BB%98%E8%B4%B9%E5%B9%BF%E5%91%8A%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22https%3A%2F%2Fsp0.baidu.com%2F9q9JcDHa2gU2pMbgoY3K%2Fadrc.php%3Ft%3D06KL00c00fZg9KY0DvNb0nVfAs0h5HwX000005tSQ7C00000ILw5vs.THd_py78ph-90A3qmh7GuZR0T1dbmHfvmhuhmW0snAcsPWIW0ZRqwHmYnDwKfWuArjNAf161nHw7rjwjnW0knbnkPbc%22%2C%22%24latest_referrer_host%22%3A%22sp0.baidu.com%22%2C%22%24latest_search_keyword%22%3A%22lianjia%22%2C%22%24latest_utm_source%22%3A%22baidu%22%2C%22%24latest_utm_medium%22%3A%22pinzhuan%22%2C%22%24latest_utm_campaign%22%3A%22sousuo%22%2C%22%24latest_utm_content%22%3A%22biaotimiaoshu%22%2C%22%24latest_utm_term%22%3A%22biaoti%22%7D%7D; _ga=GA1.2.2009725374.1580481837; _gid=GA1.2.997581348.1580481837; select_city=110000; Hm_lvt_9152f8221cb6243a53c83b956842be8a=1580481885; Hm_lpvt_9152f8221cb6243a53c83b956842be8a=1580481885; _qzja=1.1665922728.1580481835283.1580481835283.1580481835284.1580481835284.1580481885605.0.0.0.2.1; _qzjb=1.1580481835284.2.0.0.0; _qzjto=2.1.0; _jzqb=1.2.10.1580481835.1",
            "DNT": "1",
            "Host": "bj.lianjia.com",
            "Referer": "https://bj.lianjia.com/ershoufang/",
            "Sec-Fetch-Mode": "navigate",
            "Sec-Fetch-Site": "same-origin",
            "Sec-Fetch-User": "?1",
            "Upgrade-Insecure-Requests": "1",
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36"
        }

    def get_contents(self, current):
        postData = {

        }
        url = "https://bj.lianjia.com/ershoufang/pg2/"
        html = HttpUtils.do_request("GET", url, self.headers, "")
        root = etree.HTML(html.text)
        for s in root.xpath('//li[@class="clear LOGVIEWDATA LOGCLICKDATA"]'):
            dict_data = dict()
            url = s.xpath('./div[1]/div[@class="title"]/a/@href')[0]
            dict_data['title'] = s.xpath('./div[1]/div[@class="title"]/a/text()')[0]  # 标题
            dict_data['address'] = "".join(s.xpath('./div[1]/div[@class="flood"]//text()')).strip()  # 地址
            dict_data['type'] = "".join(s.xpath('./div[1]/div[@class="address"]//text()')).strip()  # 房屋类型
            follow = "".join(s.xpath('./div[1]/div[@class="followInfo"]//text()'))
            dict_data['follow'] = follow.split("/")[0].strip()  # 关注数
            dict_data['time'] = follow.split("/")[1].strip()  # 发布时间
            dict_data['price_total'] = "".join(s.xpath('./div[1]/div[@class="priceInfo"]/div[1]//text()'))  # 总价
            dict_data['price_unit'] = "".join(s.xpath('./div[1]/div[@class="priceInfo"]/div[2]//text()'))  # 单价
            # 标签
            num = 0
            for d in s.xpath('./div[1]/div[@class="tag"]/span/text()'):
                dict_data['tag_' + str(num)] = d
                num += 1
            print(str(dict_data))
            self.get_detail(url)

    # 房屋明细
    def get_detail(self, url):
        html = HttpUtils.do_request("GET", url, self.headers, "")
        root = etree.HTML(html.text)
        selector = root.xpath('//div[@class="content"]/ul/li')
        dict_data = dict()
        dict_data['huxing'] = selector[0].xpath('./text()')[0]  # 户型
        dict_data['louceng'] = selector[1].xpath('./text()')[0]  # 所在楼层
        dict_data['mianji'] = selector[3].xpath('./text()')[0]  # 建筑面积
        dict_data['leixing'] = selector[4].xpath('./text()')[0]  # 建筑类型
        dict_data['chaoxiang'] = selector[5].xpath('./text()')[0]  # 房屋朝向
        dict_data['jiegou'] = selector[6].xpath('./text()')[0]  # 建筑结构


if __name__ == '__main__':
    test_get = Test_Get()
    for i in range(1, 100):
        print(i)
        test_get.get_contents(i)